This file contains the compression and decompression algorithms from the source
code to ARC as destributed by SEA. This file contains:
ARCPACK.C, ARCUNP.C, ARCSQ.C, ARCUSQ.C, and ARCLZW.C.

/*  ARC - Archive utility - ARCPACK

$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =3.37), created on $tag(
TED_DATE DB =02/03/86) at $tag(
TED_TIME DB =22:58:01))#
$undefine(tag)#
    $version

(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED

    By:  Thom Henderson

    Description:
	 This file contains the routines used to compress a file
	 when placing it in an archive.

    Programming notes:
	 September 22 1986  Phil Suematsu
	      Modified for Xenix System V Release 2
	      Modified for Microsoft C 3.0 under PC-DOS 3.0

    Language:
	 Computer Innovations Optimizing C86
*/
#include <stdio.h>
#include "arc.h"

/* stuff for non-repeat packing */

#define DLE 0x90			/* repeat sequence marker */

static unsigned char	state;		/* current packing state */

/* non-repeat packing states */

#define NOHIST		0		/* don't consider previous input*/
#define SENTCHAR	1		/* lastchar set, no lookahead yet */
#define SENDNEWC	2		/* run over, send new char next */
#define SENDCNT		3		/* newchar set, send count next */

/* packing results */

static long	stdlen; 		/* length for standard packing */
static int	crcval; 		/* CRC check value */

pack(f, t, hdr) 			/* pack file into an archive */
FILE		*f, *t;			/* source, destination */
struct heads	*hdr;			/* pointer to header data */
{
	int	c;			/* one character of stream */
	long	ncrlen; 		/* length after packing */
	long	huflen; 		/* length after squeezing */
	long	lzwlen; 		/* length after crunching */
	long	pred_sq(), file_sq();	/* stuff for squeezing */
	long	pred_cm();		/* dynamic crunching cleanup */
#if MACRO				/* + Suematsu */
	char	tnam[$strlen];		/* temporary name buffer */
#else
	char	tnam[ARCSTRLEN];	/* temporary name buffer */
#endif					/* - Suematsu */
	char	*makefnam();		/* filename fixer upper */
	FILE	*crn = NULL;		/* temporary crunch file */

	/* first pass - see which method is best */

	if (!nocomp) {			/* if storage kludge not active */
		if (note)
			printf(" analyzing, ");

		if (arctemp)		/* use temp area if specified */
			sprintf(tnam, "%s$ARCTEMP.CRN", arctemp);
		else
			makefnam("$ARCTEMP.CRN", arcname, tnam);
#if LATTICE				/* + Suematsu */
		crn = fopen(tnam, "wrb");
#endif
#if MSC
		crn = fopen(tnam, "wb+");
#endif
#if XENIX
		crn = fopen(tnam, "w+");
#endif					/* - Suematsu */
		state = NOHIST; 	/* initialize ncr packing */
		stdlen =  ncrlen = 0;	/* reset size counters */
		crcval = 0;		/* initialize CRC check value */
		setcode();		/* initialize encryption */

		init_cm(f, crn);	/* initialize for crunching */
		init_sq();		/* initialize for squeeze scan */

		while ((c = getc_ncr(f)) != EOF) {	/* for each byte of file */
			ncrlen++;		/* one more packed byte */
			scan_sq(c);		/* see what squeezing can do */
			putc_cm(c, crn);	/* see what crunching can do */
		}
		huflen = pred_sq();		/* finish up after squeezing */
		lzwlen = pred_cm(crn);		/* finish up after crunching */
	} else {				/* else kludge the method */
		stdlen = 0;			/* make standard look best */
		ncrlen = huflen = lzwlen = 1;
	}

	/* standard set-ups common to all methods */

	fseek(f, 0L, 0);		/* rewind input */
	hdr->crc = crcval;		/* note CRC check value */
	hdr->length = stdlen;		/* set actual file length */
	state = NOHIST; 		/* reinitialize ncr packing */
	setcode();			/* reinitialize encryption */

	/* choose and use the shortest method */

	if (stdlen <= ncrlen && stdlen <= huflen && stdlen <= lzwlen) {
		if (kludge)		/*DEBUG*/
			printf("(%ld) ", lzwlen - stdlen);
		if (note)
			printf("storing, ");	/* store without compression */
		hdrver = 2;			/* note packing method */
		stdlen = crcval = 0;		/* recalc these for kludge */
		while ((c = getch(f)) != EOF)	/* store it straight */
			putc_pak(c, t);
		hdr->crc = crcval;
		hdr->length = hdr->size = stdlen;
	} else if (ncrlen < huflen && ncrlen < lzwlen) {
		if (kludge)			/*DEBUG*/
			printf("(%ld) ", lzwlen - ncrlen);
		if (note)
			printf("packing, ");	/* pack with repeat suppression */
		hdrver = 3;			/* note packing method */
		hdr->size = ncrlen;		/* set data length */
		while ((c = getc_ncr(f)) != EOF)
			putc_pak(c, t);
	} else if (huflen < lzwlen) {
		if (kludge)			/*DEBUG*/
			printf("(%ld) ", lzwlen - huflen);
		if (note)
			printf("squeezing, ");
		hdrver = 4;			/* note packing method */
		hdr->size = file_sq(f, t);	/* note final size */
	} else {
		if (kludge)			/*DEBUG*/
			printf("(%ld) ", huflen - lzwlen);
		if (note)
			printf("crunching, ");
		hdrver = 8;
		hdr->size = lzwlen;		/* size should not change */
		if (crn) {			/* if temp was created */
#if MSC|XENIX					/* + Suematsu */
			fclose(crn);
# if !XENIX
			if (!(crn = fopen(tnam, "rb")))
# else
			if (!(crn = fopen(tnam, "r")))
# endif
			{
				printf("Cannot open temporary file %s\n", tnam);
				exit(1);
			}
#endif
#if LATTICE
			fseek(crn, 0L, 0);	/* then copy over crunched temp */
#endif						/* - Suematsu */
			while ((c = fgetc(crn)) != EOF)
				putc_tst(c, t);
		} else {			/* else re-crunch */
			init_cm(f, t);
			while ((c = getc_ncr(f)) != EOF)
				putc_cm(c, t);
			pred_cm(t);		/* finish up after crunching */
		}
	}

	/* standard cleanups common to all methods */

	if (crn) {			/* get rid of crunch temporary */
		fclose(crn);
		if (unlink(tnam) && warn) {
			printf("Cannot delete temporary file %s\n", tnam);
			nerrs++;
		}
	}
	if (note)
		printf("done.\n");
}


/*  Non-repeat compression - text is passed through normally, except that
    a run of more than two is encoded as:

	 <char> <DLE> <count>

    Special case: a count of zero indicates that the DLE is really a DLE,
    not a repeat marker.
*/

int	getc_ncr(f)			/* get bytes with collapsed runs */
FILE *f;				/* file to get from */
{
	static int	lastc;		/* value returned on last call */
	static int	repcnt; 	/* repetition counter */
	static int	c;		/* latest value seen */

	switch (state) {			/* depends on our state */
	case NOHIST:			/* no relevant history */
		state = SENTCHAR;
		return lastc = getch(f);/* remember the value next time */

	case SENTCHAR:			/* char was sent. look ahead */
		switch (lastc) {	/* action depends on char */
		case DLE:		/* if we sent a real DLE */
			state = NOHIST; /* then start over again */
			return 0;	/* but note that the DLE was real */

		case EOF:		/* EOF is always a special case */
			return EOF;

		default:		/* else test for a repeat */
			for (repcnt = 1; (c = getch(f)) == lastc && repcnt <
			    255; repcnt++)
				;	/* find end of run */

			switch (repcnt) { /* action depends on run size */
			case 1: 	/* not a repeat */
				return lastc = c;	/* but remember value next time */

			case 2: 			/* a repeat, but too short */
				state = SENDNEWC;	/* send the second one next time */
				return lastc;

			default:			/* a run - compress it */
				state = SENDCNT;	/* send repeat count next time */
				return DLE;		/* send repeat marker this time */
			}
		}

	case SENDNEWC:			/* send second char of short run */
		state = SENTCHAR;
		return lastc = c;

	case SENDCNT:			/* sent DLE, now send count */
		state = SENDNEWC;
		return repcnt;

	default:
#if LATTICE				/* + Suematsu */
		abort("Bug - bad ncr state\n");
#else
		printf("Bug - bad ncr state\n");
		exit(1);
#endif					/* - Suematsu */
	}
}


static int	getch(f)		/* special get char for packing */
FILE *f;				/* file to get from */
{
	int	c;			/* a char from the file */

	if ((c = fgetc(f)) != EOF) {	/* if not the end of file */
		crcval = addcrc(crcval, c);	/* then update CRC check value */
		stdlen++;			/* and bump length counter */
	}

	return c;
}


putc_pak(c, f)				/* put a packed byte into archive */
char	c;				/* byte to put */
FILE *f;				/* archive to put it in */
{
	putc_tst(code(c), f);		/* put encoded byte, with checks */
}



/*  ARC - Archive utility - ARCUNP

$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =3.16), created on $tag(
TED_DATE DB =02/03/86) at $tag(
TED_TIME DB =23:01:16))#
$undefine(tag)#
    $version

(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED

    By:  Thom Henderson

    Description:
	 This file contains the routines used to expand a file
	 when taking it out of an archive.

    Programming notes:
	 September 22 1986  Phil Suematsu
	      Modified for Xenix System V Release 2
	      Modified for Microsoft C 3.0 under PC-DOS 3.0

    Language:
	 Computer Innovations Optimizing C86
*/
#include <stdio.h>
#include "arc.h"

/* stuff for repeat unpacking */

#define DLE 0x90			/* repeat byte flag */

static int	state;			/* repeat unpacking state */

/* repeat unpacking states */

#define NOHIST 0			/* no relevant history */
#define INREP 1 			/* sending a repeated value */

static int	crcval; 		/* CRC check value */
static long	size;			/* bytes to read */

int	unpack(f, t, hdr)		/* unpack an archive entry */
FILE *f, *t;				/* source, destination */
struct heads *hdr;			/* pointer to file header data */
{
	int	c;			/* one char of stream */

	/* setups common to all methods */

	crcval = 0;			/* reset CRC check value */
	size = hdr->size;		/* set input byte counter */
	state = NOHIST; 		/* initial repeat unpacking state */
	setcode();			/* set up for decoding */

	/* use whatever method is appropriate */

	switch (hdrver) { 		/* choose proper unpack method */
	case 1: 			/* standard packing */
	case 2:
		while ((c = getc_unp(f)) != EOF)
			putc_unp(c, t);
		break;

	case 3: 			/* non-repeat packing */
		while ((c = getc_unp(f)) != EOF)
			putc_ncr(c, t);
		break;

	case 4: 			/* Huffman squeezing */
		init_usq(f);
		while ((c = getc_usq(f)) != EOF)
			putc_ncr(c, t);
		break;

	case 5: 			/* Lempel-Zev compression */
		init_ucr(0);
		while ((c = getc_ucr(f)) != EOF)
			putc_unp(c, t);
		break;

	case 6: 			/* Lempel-Zev plus non-repeat */
		init_ucr(0);
		while ((c = getc_ucr(f)) != EOF)
			putc_ncr(c, t);
		break;

	case 7: 			/* L-Z plus ncr with new hash */
		init_ucr(1);
		while ((c = getc_ucr(f)) != EOF)
			putc_ncr(c, t);
		break;

	case 8: 			/* dynamic Lempel-Zev */
		decomp(f, t);
		break;

	default:			/* unknown method */
		if (warn) {
			printf("I don't know how to unpack file %s\n", hdr->name);
			printf("I think you need a newer version of ARC\n");
			nerrs++;
		}
		fseek(f, hdr->size, 1); /* skip over bad file */
		return 1;		/* note defective file */
	}

	/* cleanups common to all methods */

	if (crcval != hdr->crc) {
		if (warn) {
			printf("WARNING: File %s fails CRC check\n", hdr->name);
			nerrs++;
		}
		return 1;		/* note defective file */
	}
	return 0;			/* file is okay */
}


/*  This routine is used to put bytes in the output file.  It also
    performs various housekeeping functions, such as maintaining the
    CRC check value.
*/

static putc_unp(c, t)			/* output an unpacked byte */
char	c;				/* byte to output */
FILE *t;				/* file to output to */
{
	crcval = addcrc(crcval, c);	/* update the CRC check value */
	putc_tst(c, t);
}


/*  This routine is used to decode non-repeat compression.  Bytes are
    passed one at a time in coded format, and are written out uncoded.
    The data is stored normally, except that runs of more than two
    characters are represented as:

	 <char> <DLE> <count>

    With a special case that a count of zero indicates a DLE as data,
    not as a repeat marker.
*/

putc_ncr(c, t)				/* put NCR coded bytes */
unsigned char	c;			/* next byte of stream */
FILE *t;				/* file to receive data */
{
	static int	lastc;		/* last character seen */

	switch (state) {			/* action depends on our state */
	case NOHIST:			/* no previous history */
		if (c == DLE)		/* if starting a series */
			state = INREP;	/* then remember it next time */
		else
			putc_unp(lastc = c, t);	/* else nothing unusual */
		return;

	case INREP:			/* in a repeat */
		if (c)			/* if count is nonzero */
			while (--c)	/* then repeatedly ... */
				putc_unp(lastc, t);	/* ... output the byte */
		else
			putc_unp(DLE, t);	/* else output DLE as data */
		state = NOHIST; 		/* back to no history */
		return;

	default:
#if LATTICE					/* + Suematsu */
		abort("Bad NCR unpacking state (%d)", state);
#else
		printf("Bad NCR unpacking state (%d)\n", state);
		exit(1);
#endif						/* - Suematsu */
	}
}


/*  This routine provides low-level byte input from an archive.  This
    routine MUST be used, as end-of-file is simulated at the end of
    the archive entry.
*/

int	getc_unp(f)				/* get a byte from an archive */
FILE *f;					/* archive file to read */
{
	if (!size)				/* if no data left */
		return EOF;			/* then pretend end of file */

	size--; 				/* deduct from input counter */
	return code(fgetc(f));			/* and return next decoded byte */
}



/*  ARC - Archive utility - ARCSQ

$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =3.10), created on $tag(
TED_DATE DB =01/30/86) at $tag(
TED_TIME DB =20:10:46))#
$undefine(tag)#
    $version

(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED

    By:  Thom Henderson

    Description:
	 This file contains the routines used to squeeze a file
	 when placing it in an archive.

    Language:
	 Computer Innovations Optimizing C86

    Programming notes:
	 September 22 1986  Phil Suematsu
	      Modified for Xenix System V Release 2
	      Modified for Microsoft C 3.0 under PC-DOS 3.0

	 Most of the routines used for the Huffman squeezing algorithm
	 were lifted from the SQ program by Dick Greenlaw, as adapted
	 to CI-C86 by Robert J. Beilstein.
*/
#include <stdio.h>
#include "arc.h"			/* Suematsu */

/* stuff for Huffman squeezing */

#define TRUE 1
#define FALSE 0
#define ERROR (-1)
#define SPEOF 256			/* special endfile token */
#define NOCHILD -1			/* marks end of path through tree */
#define NUMVALS 257			/* 256 data values plus SPEOF*/
#define NUMNODES (NUMVALS+NUMVALS-1)	/* number of nodes */
#define MAXCOUNT (unsigned) 65535	/* biggest unsigned integer */

/* The following array of structures are the nodes of the
   binary trees. The first NUMVALS nodes become the leaves of the
   final tree and represent the values of the data bytes being
   encoded and the special endfile, SPEOF.
   The remaining nodes become the internal nodes of the final tree.
*/

struct nd				/* shared by unsqueezer */
{
	unsigned	weight; 	/* number of appearances */
	int		tdepth; 	/* length on longest path in tree */
	int		lchild, rchild; /* indices to next level */
} node[NUMNODES];			/* use large buffer */

static int	dctreehd;		/* index to head of final tree */

/* This is the encoding table:
   The bit strings have first bit in low bit.
   Note that counts were scaled so code fits unsigned integer.
*/

static int	codelen[NUMVALS];	/* number of bits in code */
static unsigned code[NUMVALS];		/* code itself, right adjusted */
static unsigned tcode;			/* temporary code value */
static long	valcount[NUMVALS];	/* actual count of times seen */

/* Variables used by encoding process */

static int	curin;			/* value currently being encoded */
static int	cbitsrem;		/* # of code string bits left */
static unsigned ccode;			/* current code right justified */

init_sq()				/* prepare for scanning pass */
{
	int	i;			/* node index */

	/* Initialize all nodes to single element binary trees
	   with zero weight and depth.
	 */

	for (i = 0; i < NUMNODES; ++i) {
		node[i].weight = 0;
		node[i].tdepth = 0;
		node[i].lchild = NOCHILD;
		node[i].rchild = NOCHILD;
	}

	for (i = 0; i < NUMVALS; i++)
		valcount[i] = 0;
}


scan_sq(c)				/* add a byte to the tables */
int	c;				/* byte to add */
{
	unsigned	*wp;		/* speeds up weight counting */

	/* Build frequency info in tree */

	if (c == EOF)			/* it's traditional */
		c = SPEOF;		/* dumb, but traditional */

	if (*(wp = &node[c].weight) !=	MAXCOUNT)
		++(*wp);		/* bump weight counter */

	valcount[c]++;			/* bump byte counter */
}


long	pred_sq()			/* predict size of squeezed file */
{
	int		i;
	int		btlist[NUMVALS];/* list of intermediate b-trees */
	int		listlen;	/* length of btlist */
	unsigned	ceiling;	/* limit for scaling */
	long		size = 0;	/* predicted size */
	int		numnodes;	/* # of nodes in simplified tree */

	scan_sq(EOF);			/* signal end of input */

	ceiling = MAXCOUNT;

	/* Keep trying to scale and encode */

	do {
		scale(ceiling);
		ceiling /= 2;		/* in case we rescale */

		/* Build list of single node binary trees having
		   leaves for the input values with non-zero counts
		 */

		for (i = listlen = 0; i < NUMVALS; ++i) {
			if (node[i].weight != 0) {
				node[i].tdepth = 0;
				btlist[listlen++] = i;
			}
		}

		/* Arrange list of trees into a heap with the entry
		   indexing the node with the least weight at the top.
		 */

		heap(btlist, listlen);

		/* Convert the list of trees to a single decoding tree */

		bld_tree(btlist, listlen);

		/* Initialize the encoding table */

		init_enc();

		/* Try to build encoding table.
		   Fail if any code is > 16 bits long.
		 */
	} while (buildenc(0, dctreehd) == ERROR);

	/* Initialize encoding variables */

	cbitsrem = 0;				/* force initial read */
	curin = 0;				/* anything but endfile */

	for (i = 0; i < NUMVALS; i++)		/* add bits for each code */
		size += valcount[i] * codelen[i];

	size = (size + 7) / 8;			/* reduce to number of bytes */

	numnodes = dctreehd < NUMVALS ? 0 : dctreehd - (NUMVALS - 1);

	size += sizeof(int) + 2 * numnodes * sizeof(int);

	return size;
}


/* The count of number of occurrances of each input value
   have already been prevented from exceeding MAXCOUNT.
   Now we must scale them so that their sum doesn't exceed
   ceiling and yet no non-zero count can become zero.
   This scaling prevents errors in the weights of the
   interior nodes of the Huffman tree and also ensures that
   the codes will fit in an unsigned integer. Rescaling is
   used if necessary to limit the code length.
*/

static scale(ceil)
unsigned	ceil;			/* upper limit on total weight */
{
	register int	i, c;
	int		ovflw, divisor;
	unsigned	w, sum;
	unsigned char	increased;	/* flag */

	do {
		for (i = sum = ovflw = 0; i < NUMVALS; ++i) {
			if (node[i].weight > (ceil - sum))
				++ovflw;
			sum += node[i].weight;
		}

		divisor = ovflw + 1;

		/* Ensure no non-zero values are lost */

		increased = FALSE;
		for (i = 0; i < NUMVALS; ++i) {
			w = node[i].weight;
			if (w < divisor && w != 0) {
				/* Don't fail to provide a code if it's used at all */

				node[i].weight = divisor;
				increased = TRUE;
			}
		}
	} while (increased);

	/* Scaling factor choosen, now scale */

	if (divisor > 1)
		for (i = 0; i < NUMVALS; ++i)
			node[i].weight /= divisor;
}


/* heap() and adjust() maintain a list of binary trees as a
   heap with the top indexing the binary tree on the list
   which has the least weight or, in case of equal weights,
   least depth in its longest path. The depth part is not
   strictly necessary, but tends to avoid long codes which
   might provoke rescaling.
*/

static heap(list, length)
int	list[], length;
{
	register int	i;

	for (i = (length - 2) / 2; i >= 0; --i)
		adjust(list, i, length - 1);
}


/* Make a heap from a heap with a new top */

static adjust(list, top, bottom)
int	list[], top, bottom;
{
	register int	k, temp;

	k = 2 * top + 1;		/* left child of top */
	temp = list[top];		/* remember root node of top tree */

	if (k <= bottom) {
		if (k < bottom && cmptrees(list[k], list[k+1]))
			++k;

		/* k indexes "smaller" child (in heap of trees) of top */
		/* now make top index "smaller" of old top and smallest child */

		if (cmptrees(temp, list[k])) {
			list[top] = list[k];
			list[k] = temp;

			/* Make the changed list a heap */

			adjust(list, k, bottom);   /* recursive */
		}
	}
}


/* Compare two trees, if a > b return true, else return false.
   Note comparison rules in previous comments.
*/

static cmptrees(a, b)
int	a, b;				/* root nodes of trees */
{
	if (node[a].weight > node[b].weight)
		return TRUE;
	if (node[a].weight == node[b].weight)
		if (node[a].tdepth > node[b].tdepth)
			return TRUE;
	return FALSE;
}


/* HUFFMAN ALGORITHM: develops the single element trees
   into a single binary tree by forming subtrees rooted in
   interior nodes having weights equal to the sum of weights of all
   their descendents and having depth counts indicating the
   depth of their longest paths.

   When all trees have been formed into a single tree satisfying
   the heap property (on weight, with depth as a tie breaker)
   then the binary code assigned to a leaf (value to be encoded)
   is then the series of left (0) and right (1)
   paths leading from the root to the leaf.
   Note that trees are removed from the heaped list by
   moving the last element over the top element and
   reheaping the shorter list.
*/

static bld_tree(list, len)
int	list[];
int	len;
{
	register int		freenode;	/* next free node in tree */
	register struct nd	*frnp;		/* free node pointer */
	int			lch, rch;	/* temps for left, right children */
	int			i;

	/* Initialize index to next available (non-leaf) node.
	   Lower numbered nodes correspond to leaves (data values).
	 */

	freenode = NUMVALS;

	while (len > 1) {
		/* Take from list two btrees with least weight
		   and build an interior node pointing to them.
		   This forms a new tree.
		 */

		lch = list[0];		/* This one will be left child */

		/* delete top (least) tree from the list of trees */

		list[0] = list[--len];
		adjust(list, 0, len - 1);

		/* Take new top (least) tree. Reuse list slot later */

		rch = list[0];		/* This one will be right child */

		/* Form new tree from the two least trees using
		   a free node as root. Put the new tree in the list.
		 */

		frnp = &node[freenode]; /* address of next free node */
		list[0] = freenode++;	/* put at top for now */
		frnp->lchild = lch;
		frnp->rchild = rch;
		frnp->weight = node[lch].weight + node[rch].weight;
		frnp->tdepth = 1 + maxchar(node[lch].tdepth, node[rch].tdepth);

		/* reheap list	to get least tree at top */

		adjust(list, 0, len - 1);
	}
	dctreehd = list[0];		/* head of final tree */
}


static maxchar(a, b)
{
	return a > b ? a : b;
}


static init_enc()
{
	register int	i;

	/* Initialize encoding table */

	for (i = 0; i < NUMVALS; ++i)
		codelen[i] = 0;
}


/* Recursive routine to walk the indicated subtree and level
   and maintain the current path code in bstree. When a leaf
   is found the entire code string and length are put into
   the encoding table entry for the leaf's data value .

   Returns ERROR if codes are too long.
*/

static int buildenc(level, root)
int	level;		/* level of tree being examined, from zero */
int	root;		/* root of subtree is also data value if leaf */
{
	register int	l, r;
#if MSC|XENIX		/* + Suematsu */
	unsigned	mask;
#endif			/* - Suematsu */

	l = node[root].lchild;
	r = node[root].rchild;

	if (l == NOCHILD && r == NOCHILD) {
		/* Leaf. Previous path determines bit string
		   code of length level (bits 0 to level - 1).
		   Ensures unused code bits are zero.
		 */

		codelen[root] = level;
#if MSC|XENIX	/* + Suematsu */
		code[root] = tcode & ((mask = ~0) >> (16 - level));
#else
		code[root] = tcode & (((unsigned)~0) >> (16 - level));
#endif		/* - Suematsu */
		return (level > 16) ? ERROR : (int) NULL;
	} else {
		if (l != NOCHILD) {
			/* Clear path bit and continue deeper */

			tcode &= ~(1 << level);
			if (buildenc(level + 1, l) == ERROR)
				return ERROR;	/* pass back bad statuses */
		}
		if (r != NOCHILD) {
			/* Set path bit and continue deeper */

			tcode |= 1 << level;
			if (buildenc(level + 1, r) == ERROR)
				return ERROR;	/* pass back bad statuses */
		}
	}
	return (int) NULL;			/* it worked if we reach here */
}


static put_int(n, f)			/* output an integer */
int	n;				/* integer to output */
FILE *f;				/* file to put it to */
{
	putc_pak(n & 0xff, f);		/* first the low byte */
	putc_pak(n >> 8, f);		/* then the high byte */
}


/* Write out the header of the compressed file */

static long	wrt_head(ob)
FILE *ob;
{
	register int	l, r;
	int	i, k;
	int	numnodes;		/* # of nodes in simplified tree */

	/* Write out a simplified decoding tree. Only the interior
	   nodes are written. When a child is a leaf index
           (representing a data value) it is recoded as
           -(index + 1) to distinguish it from interior indexes
           which are recoded as positive indexes in the new tree.

           Note that this tree will be empty for an empty file.
	 */

	numnodes = dctreehd < NUMVALS ? 0 : dctreehd - (NUMVALS - 1);
	put_int(numnodes, ob);

	for (k = 0, i = dctreehd; k < numnodes; ++k, --i) {
		l = node[i].lchild;
		r = node[i].rchild;
		l = l < NUMVALS ? -(l + 1) : dctreehd - l;
		r = r < NUMVALS ? -(r + 1) : dctreehd - r;
		put_int(l, ob);
		put_int(r, ob);
	}

	return sizeof(int) + numnodes * 2 * sizeof(int);
}


/* Get an encoded byte or EOF. Reads from specified stream AS NEEDED.

   There are two unsynchronized bit-byte relationships here.
   The input stream bytes are converted to bit strings of
   various lengths via the static variables named c...
   These bit strings are concatenated without padding to
   become the stream of encoded result bytes, which this
   function returns one at a time. The EOF (end of file) is
   converted to SPEOF for convenience and encoded like any
   other input value. True EOF is returned after that.
*/

static int gethuff(ib)			/* Returns bytes except for EOF */
FILE *ib;
{
	int	rbyte;			/* Result byte value */
	int	need, take;		/* numbers of bits */

	rbyte = 0;
	need = 8;			/* build one byte per call */

	/* Loop to build a byte of encoded data.
	   Initialization forces read the first time.
	 */

loop:
	if (cbitsrem >= need) {		/* if current code is big enough */
		if (need == 0)
			return rbyte;

		rbyte |= ccode << (8 - need);	/* take what we need */
		ccode >>= need; 		/* and leave the rest */
		cbitsrem -= need;
		return rbyte & 0xff;
	}

	/* We need more than current code */

	if (cbitsrem > 0) {
		rbyte |= ccode << (8 - need);	/* take what there is */
		need -= cbitsrem;
	}

	/* No more bits in current code string */

	if (curin == SPEOF) {
		/* The end of file token has been encoded. If
		   result byte has data return it and do EOF next time.
		 */

		cbitsrem = 0;
		return (need == 8) ? EOF : rbyte + 0;
	}

	/* Get an input byte */

	if ((curin = getc_ncr(ib)) == EOF)
		curin = SPEOF;			/* convenient for encoding */

	ccode = code[curin];			/* get the new byte's code */
	cbitsrem = codelen[curin];

	goto loop;
}


/*  This routine is used to perform the actual squeeze operation.  It can
    only be called after the file has been scanned.  It returns the true
    length of the squeezed entry.
*/

long	file_sq(f, t)			/* squeeze a file into an archive */
FILE *f;				/* file to squeeze */
FILE *t;				/* archive to receive file */
{
	int	c;			/* one byte of squeezed data */
	long	size;			/* size after squeezing */

	size = wrt_head(t);		/* write out the decode tree */

	while ((c = gethuff(f)) != EOF) {
		putc_pak(c, t);
		size++;
	}

	return size;			/* report true size */
}



/*  ARC - Archive utility - ARCUSQ

$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =3.13), created on $tag(
TED_DATE DB =01/30/86) at $tag(
TED_TIME DB =20:11:42))#
$undefine(tag)#
    $version

(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED

    By:  Thom Henderson

    Description:
	 This file contains the routines used to expand a file
	 which was packed using Huffman squeezing.

	 Most of this code is taken from an USQ program by Richard
	 Greenlaw, which was adapted to CI-C86 by Robert J. Beilstein.

    Programming notes:
	 September 22 1986  Phil Suematsu
	      Modified for Xenix System V Release 2
	      Modified for Microsoft C 3.0 under PC-DOS 3.0

    Language:
	 Computer Innovations Optimizing C86
*/
#include <stdio.h>
#include "arc.h"

/* stuff for Huffman unsqueezing */

#define ERROR (-1)

#define SPEOF 256			/* special endfile token */
#define NUMVALS 257			/* 256 data values plus SPEOF */

extern struct nd			/* decoding tree */
{
	int	child[2];		/* left, right */
} node[NUMVALS];			/* use large buffer */

static int	bpos;			/* last bit position read */
static int	curin;			/* last byte value read */
static int	numnodes;		/* number of nodes in decode tree */

static int	get_int(f)		/* get an integer */
FILE *f;				/* file to get it from */
{
	return getc_unp(f) | (getc_unp(f) << 8);
}


init_usq(f)				/* initialize Huffman unsqueezing */
FILE *f;				/* file containing squeezed data */
{
	int	i;			/* node index */

	bpos = 99;			/* force initial read */

	numnodes = get_int(f);

	if (numnodes < 0 || numnodes >= NUMVALS)
#if LATTICE				/* + Suematsu */
		abort("File has an invalid decode tree");
#else
	{
		printf("File has an invalid decode tree\n");
		exit(1);
	}
#endif					/* - Suematsu */
	/* initialize for possible empty tree (SPEOF only) */

	node[0].child[0] = -(SPEOF + 1);
	node[0].child[1] = -(SPEOF + 1);

	for (i = 0; i < numnodes; ++i)	/* get decoding tree from file */ {
		node[i].child[0] = get_int(f);
		node[i].child[1] = get_int(f);
	}
}


int	getc_usq(f)			/* get byte from squeezed file */
FILE *f;				/* file containing squeezed data */
{
	int	i;			/* tree index */

	/* follow bit stream in tree to a leaf */

	for (i = 0; i >= 0; )		/* work down(up?) from root */ {
		if (++bpos > 7) {
			if ((curin = getc_unp(f)) == ERROR)
				return(ERROR);
			bpos = 0;

			/* move a level deeper in tree */
			i = node[i].child[1&curin];
		} else
			i = node[i].child[1 & (curin >>= 1)];
	}

	/* decode fake node index to original data value */

	i = -(i + 1);

	/* decode special endfile token to normal EOF */

	i = (i == SPEOF) ? EOF : i;
	return i;
}




/*  ARC - Archive utility - ARCLZW

$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =1.88), created on $tag(
TED_DATE DB =01/20/86) at $tag(
TED_TIME DB =16:47:04))#
$undefine(tag)#
    $version

(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED

    By:  Thom Henderson

    Description:
	 This file contains the routines used to implement Lempel-Zev
	 data compression, which calls for building a coding table on
	 the fly.  This form of compression is especially good for encoding
	 files which contain repeated strings, and can often give dramatic
	 improvements over traditional Huffman SQueezing.

    Language:
	 Computer Innovations Optimizing C86

    Programming notes:
	 September 22 1986  Phil Suematsu
	      Modified for Xenix System V Release 2
	      Modified for Microsoft C 3.0 under PC-DOS 3.0
	 -----
	 In this section I am drawing heavily on the COMPRESS program
	 from UNIX.  The basic method is taken from "A Technique for High
	 Performance Data Compression", Terry A. Welch, IEEE Computer
	 Vol 17, No 6 (June 1984), pp 8-19.  Also see "Knuth's Fundamental
	 Algorithms", Donald Knuth, Vol 3, Section 6.4.

	 As best as I can tell, this method works by tracing down a hash
	 table of code strings where each entry has the property:

	      if <string> <char> is in the table
	      then <string> is in the table.
*/
#include <stdio.h>
#include "arc.h"

/* definitions for older style crunching */

#define FALSE	 0
#define TRUE	 !FALSE
#define TABSIZE  4096
#define NO_PRED  0xFFFF
#define EMPTY	 0xFFFF
#define NOT_FND  0xFFFF

static unsigned int	inbuf;		/* partial input code storage */
static int	sp;			/* current stack pointer */

static struct entry			/* string table entry format */
{
	char	used;			/* true when this entry is in use */
	unsigned int	next;		/* ptr to next in collision list */
	unsigned int	predecessor;	/* code for preceeding string */
	unsigned char	follower;	/* char following string */
} string_tab[TABSIZE];			/* the code string table */


/* definitions for the new dynamic Lempel-Zev crunching */

#define BITS   12			/* maximum bits per code */
#define HSIZE  5003			/* 80% occupancy */
#define INIT_BITS 9			/* initial number of bits/code */

static int	n_bits; 		/* number of bits/code */
static int	maxcode;		/* maximum code, given n_bits */
#define MAXCODE(n)	((1<<(n)) - 1)	/* maximum code calculation */
static int	maxcodemax =  1 << BITS;/* largest possible code (+1) */

static char	buf[BITS];		/* input/output buffer */

static unsigned char	lmask[9] =	/* left side masks */
{
	0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00
};


static unsigned char	rmask[9] =	/* right side masks */
{
	0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff
};


static int		offset; 	/* byte offset for code output */
static long		in_count;	/* length of input */
static long		bytes_out;	/* length of compressed output */
static unsigned int	ent;

/* To save much memory (which we badly need at this point), we overlay
 * the table used by the previous version of Lempel-Zev with those used
 * by the new version.	Since no two of these routines will be used
 * together, we can safely do this.  Note that the tables used for Huffman
 * squeezing may NOT overlay these, since squeezing and crunching are done
 * in parallel.
 */

static long		*htab = (long *) string_tab;	/* hash code table   (crunch) */
static unsigned int	codetab[HSIZE];			/* string code table (crunch) */

static unsigned int	*prefix = codetab;		/* prefix code table (uncrunch) */
static unsigned char	*suffix = (char *) string_tab;	/* suffix table (uncrunch) */

static int		free_ent;			/* first unused entry */
static int		firstcmp;			/* true at start of compression */
static unsigned char	stack[HSIZE];			/* local push/pop stack */

/*
 * block compression parameters -- after all codes are used up,
 * and compression rate changes, start over.
 */

static int	clear_flg;
static long	ratio;
#define CHECK_GAP 10000 	/* ratio check interval */
static long	checkpoint;

/*
 * the next two codes should not be changed lightly, as they must not
 * lie within the contiguous general code space.
 */
#define FIRST	257		/* first free entry */
#define CLEAR	256		/* table clear output code */

static cl_block(t)		/* table clear for block compress */
FILE *t;			/* our output file */
{
	long int	rat;

	checkpoint = in_count + CHECK_GAP;

	if (in_count > 0x007fffff) {	/* shift will overflow */
		rat = bytes_out >> 8;
		if (rat == 0)		/* Don't divide by zero */
			rat = 0x7fffffff;
		else
			rat = in_count / rat;
	} else
		rat = (in_count << 8) / bytes_out;/* 8 fractional bits */

	if (rat > ratio)
		ratio = rat;
	else
	 {
		ratio = 0;
		setmem(htab, HSIZE * sizeof(long), 0xff);
		free_ent = FIRST;
		clear_flg = 1;
		putcode(CLEAR, t);
	}
}


/*****************************************************************
 *
 * Output a given code.
 * Inputs:
 *	code:	A n_bits-bit integer.  If == -1, then EOF.  This assumes
 *		that n_bits =< (long)wordsize - 1.
 * Outputs:
 *	Outputs code to the file.
 * Assumptions:
 *	Chars are 8 bits long.
 * Algorithm:
 *	Maintain a BITS character long buffer (so that 8 codes will
 * fit in it exactly).	When the buffer fills up empty it and start over.
 */

static putcode(code, t) 		/* output a code */
int	code;				/* code to output */
FILE	*t;				/* where to put it */
{
	int	r_off = offset; 	/* right offset */
	int	bits = n_bits;		/* bits to go */
	char	*bp = buf;		/* buffer pointer */
	int	n;			/* index */

	if (code >= 0) {			/* if a real code */
		/*
		 * Get to the first byte.
		 */
		bp += (r_off >> 3);
		r_off &= 7;

		/*
		 * Since code is always >= 8 bits, only need to mask the first
		 * hunk on the left.
		 */
		*bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
		bp++;
		bits -= (8 - r_off);
		code >>= (8 - r_off);

		/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
		if (bits >= 8) {
			*bp++ = code;
			code >>= 8;
			bits -= 8;
		}

		/* Last bits. */
		if (bits)
			*bp = code;

		offset += n_bits;

		if (offset == (n_bits << 3)) {
			bp = buf;
			bits = n_bits;
			bytes_out += bits;
			do
				putc_pak(*bp++, t);
			while (--bits);
			offset = 0;
		}

		/*
		 * If the next entry is going to be too big for the code size,
		 * then increase it, if possible.
		 */
		if (free_ent > maxcode || clear_flg > 0) {
		/*
		 * Write the whole buffer, because the input side won't
		 * discover the size increase until after it has read it.
		 */
			if (offset > 0) {
				bp = buf;	/* reset pointer for writing */
				bytes_out += n = n_bits;
				while (n--)
					putc_pak(*bp++, t);
			}
			offset = 0;

			if (clear_flg) {		/* reset if clearing */
				maxcode = MAXCODE(n_bits = INIT_BITS);
				clear_flg = 0;
			} else {			/* else use more bits */
				n_bits++;
				if (n_bits == BITS)
					maxcode = maxcodemax;
				else
					maxcode = MAXCODE(n_bits);
			}
		}
	}
	  else {					/* dump the buffer on EOF */
		bytes_out += n = (offset + 7) / 8;

		if (offset > 0)
			while (n--)
				putc_pak(*bp++, t);
		offset = 0;
	}
}


/*****************************************************************
 *
 * Read one code from the standard input.  If EOF, return -1.
 * Inputs:
 *	cmpin
 * Outputs:
 *	code or -1 is returned.
 */

static int	getcode(f)	/* get a code */
FILE *f;			/* file to get from */
{
	int		code;
	static int	offset = 0, size = 0;
	int		r_off, bits;
	unsigned char	*bp = buf;

	if (clear_flg > 0 || offset >= size || free_ent > maxcode) {	/*
	  * If the next entry will be too big for the current code
	  * size, then we must increase the size.  This implies reading
	  * a new buffer full, too.
		  */
		if (free_ent > maxcode) {
			n_bits++;
			if (n_bits == BITS)
				maxcode = maxcodemax;	/* won't get any bigger now */
			else
				maxcode = MAXCODE(n_bits);
		}
		if (clear_flg > 0) {
			maxcode = MAXCODE(n_bits = INIT_BITS);
			clear_flg = 0;
		}

		for (size = 0; size < n_bits; size++) {
			if ((code = getc_unp(f)) == EOF)
				break;
			else
				buf[size] = code;
		}
		if (size <= 0)
			return - 1;			/* end of file */

		offset = 0;
		/* Round size down to integral number of codes */
		size = (size << 3) - (n_bits - 1);
	}
	r_off = offset;
	bits = n_bits;

	/*
	 * Get to the first byte.
	 */
	bp += (r_off >> 3);
	r_off &= 7;

	/* Get first part (low order bits) */
	code = (*bp++ >> r_off);
	bits -= 8 - r_off;
	r_off = 8 - r_off;			/* now, offset into code word */

	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
	if (bits >= 8) {
		code |= *bp++ << r_off;
		r_off += 8;
		bits -= 8;
	}
	/* high order bits. */
	code |= (*bp & rmask[bits]) << r_off;
	offset += n_bits;

	return code;
}


/*
 * compress a file
 *
 * Algorithm:  use open addressing double hashing (no chaining) on the
 * prefix code / next character combination.  We do a variant of Knuth's
 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
 * secondary probe.  Here, the modular division first probe is gives way
 * to a faster exclusive-or manipulation.  Also do block compression with
 * an adaptive reset, where the code table is cleared when the compression
 * ratio decreases, but after the table fills.	The variable-length output
 * codes are re-sized at this point, and a special CLEAR code is generated
 * for the decompressor.
 */

init_cm(f, t)			/* initialize for compression */
FILE *f;			/* file we will be compressing */
FILE *t;			/* where we will put it */
{
	offset = 0;
	bytes_out = 1;
	clear_flg = 0;
	ratio = 0;
	in_count = 1;
	checkpoint = CHECK_GAP;
	maxcode = MAXCODE(n_bits = INIT_BITS);
	free_ent = FIRST;
	setmem(htab, HSIZE * sizeof(long), 0xff);
	n_bits = INIT_BITS;	/* set starting code size */

	putc_pak(BITS, t);	/* note our max code length */

	firstcmp = 1;		/* next byte will be first */
}


putc_cm(c, t)			/* compress a character */
unsigned char	c;		/* character to compress */
FILE *t;			/* where to put it */
{
	static long	fcode;
	static int	hshift;
	int	i;
	int	disp;

	if (firstcmp) {			/* special case for first byte */
		ent = c;		/* remember first byte */

		hshift = 0;
		for (fcode = (long)HSIZE; fcode < 65536L; fcode *= 2L)
			hshift++;
		hshift = 8 - hshift;	/* set hash code range bound */

		firstcmp = 0;		/* no longer first */
		return;
	}

	in_count++;
	fcode = (long)(((long)c << BITS) + ent);
	i = (c << hshift) ^ ent;	/* xor hashing */

	if (htab[i] == fcode) {
		ent = codetab[i];
		return;
	} else if (htab[i] < 0) 	/* empty slot */
		goto nomatch;
	disp = HSIZE - i;		/* secondary hash (after G.Knott) */
	if (i == 0)
		disp = 1;

probe:
	if ((i -= disp) < 0)
		i += HSIZE;

	if (htab[i] == fcode) {
		ent = codetab[i];
		return;
	}
	if (htab[i] > 0)
		goto probe;

nomatch:
	putcode(ent, t);
	ent = c;
	if (free_ent < maxcodemax) {
		codetab[i] = free_ent++;	/* code -> hashtable */
		htab[i] = fcode;
	} else if ((long int)in_count >= checkpoint)
		cl_block(t);
}


long	pred_cm(t)			/* finish compressing a file */
FILE *t;				/* where to put it */
{
	putcode(ent, t);		/* put out the final code */
	putcode(-1, t); 		/* tell output we are done */

	return bytes_out;		/* say how big it got */
}


/*
 * Decompress a file.  This routine adapts to the codes in the file
 * building the string table on-the-fly; requiring no table to be stored
 * in the compressed file.  The tables used herein are shared with those of
 * the compress() routine.  See the definitions above.
 */

decomp(f, t)				/* decompress a file */
FILE *f;				/* file to read codes from */
FILE *t;				/* file to write text to */
{
	unsigned char	*stackp;
	int		finchar;
	int		code, oldcode, incode;

	if ((code = getc_unp(f)) != BITS)
#if LATTICE				/* + Suematsu */
		abort("File packed with %d bits, I can only handle %d", code,
		   BITS);
#else
	{
		printf("File packed with %d bits, I can only handle %d\n", code,
		   BITS);
		exit(1);
	}
#endif					/* - Suematsu */
	n_bits = INIT_BITS;		/* set starting code size */
	clear_flg = 0;

	/*
	 * As above, initialize the first 256 entries in the table.
	 */
	maxcode = MAXCODE(n_bits = INIT_BITS);
	for (code = 255; code >= 0; code--) {
		prefix[code] = 0;
		suffix[code] = (unsigned char) code;
	}
	free_ent = FIRST;

	finchar = oldcode = getcode(f);
	if (oldcode == -1)		/* EOF already? */
		return; 		/* Get out of here */
	putc_ncr((char)finchar, t);	/* first code must be 8 bits=char */
	stackp = stack;

	while ((code = getcode(f)) > -1) {
		if (code == CLEAR) {
			for (code = 255; code >= 0; code--)
				prefix[code] = 0;
			clear_flg = 1;
			free_ent = FIRST - 1;
			if ((code = getcode(f)) == -1)	/* O, untimely death! */
				break;
		}
		incode = code;
		/*
		 * Special case for KwKwK string.
		 */
		if (code >= free_ent) {
			*stackp++ = finchar;
			code = oldcode;
		}

		/*
		 * Generate output characters in reverse order
		 */
		while (code >= 256) {
			*stackp++ = suffix[code];
			code = prefix[code];
		}
		*stackp++ = finchar = suffix[code];

		/*
		 * And put them out in forward order
		 */
		do
			putc_ncr(*--stackp, t);
		while (stackp > stack);

		/*
		 * Generate the new entry.
		 */
		if ((code = free_ent) < maxcodemax) {
			prefix[code] = (unsigned short)oldcode;
			suffix[code] = finchar;
			free_ent = code + 1;
		}
		/*
		 * Remember previous code.
		 */
		oldcode = incode;
	}
}


/*************************************************************************
 * Please note how much trouble it can be to maintain upwards		 *
 * compatibility.  All that follows is for the sole purpose of unpacking *
 * files which were packed using an older method.			 *
 *************************************************************************/


/*  The h() pointer points to the routine to use for calculating a hash
    value.  It is set in the init routines to point to either of oldh()
    or newh().

    oldh() calculates a hash value by taking the middle twelve bits
    of the square of the key.

    newh() works somewhat differently, and was tried because it makes
    ARC about 23% faster.  This approach was abandoned because dynamic
    Lempel-Zev (above) works as well, and packs smaller also.  However,
    inadvertent release of a developmental copy forces us to leave this in.
*/

static unsigned (*h)(); 		/* pointer to hash function */

static unsigned oldh(pred, foll)	/* old hash function */
unsigned int	pred;			/* code for preceeding string */
unsigned char	foll;			/* value of following char */
{
	long	local;			/* local hash value */

	local = (pred + foll) | 0x0800; /* create the hash key */
	local *= local; 		/* square it */
	return (local >> 6) & 0x0FFF;	/* return the middle 12 bits */
}


static unsigned newh(pred, foll)	/* new hash function */
unsigned int	pred;			/* code for preceeding string */
unsigned char	foll;			/* value of following char */
{
	return ((pred + foll) * 15073) & 0xFFF;	/* faster hash */
}


/*  The eolist() function is used to trace down a list of entries with
    duplicate keys until the last duplicate is found.
*/

static unsigned eolist(index)		/* find last duplicate */
unsigned int	index;
{
	int	temp;

	while (temp = string_tab[index].next)	/* while more duplicates */
		index = temp;

	return index;
}


/*  The hash() routine is used to find a spot in the hash table for a new
    entry.  It performs a "hash and linear probe" lookup, using h() to
    calculate the starting hash value and eolist() to perform the linear
    probe.  This routine DOES NOT detect a table full condition.  That
    MUST be checked for elsewhere.
*/

static unsigned hash(pred, foll)	/* find spot in the string table */
unsigned int	pred;			/* code for preceeding string */
unsigned char	foll;			/* char following string */
{
	unsigned int	local, tempnext;/* scratch storage */
	struct entry *ep;		/* allows faster table handling */

	local = (*h)(pred, foll);	/* get initial hash value */

	if (!string_tab[local].used)	/* if that spot is free */
		return local;		/* then that's all we need */
	else {				/* else a collision has occured */
		local = eolist(local);	/* move to last duplicate */

		/* We must find an empty spot. We start looking 101 places
		   down the table from the last duplicate.
		 */

		tempnext = (local + 101) & 0x0FFF;
		ep = &string_tab[tempnext];	/* initialize pointer */

		while (ep->used) {		/* while empty spot not found */
			if (++tempnext == TABSIZE) {	/* if we are at the end */
				tempnext = 0;		/* wrap to beginning of table*/
				ep = string_tab;
			} else
				++ep;			/* point to next element in table */
		}

		/* local still has the pointer to the last duplicate, while
		   tempnext has the pointer to the spot we found.  We use
		   this to maintain the chain of pointers to duplicates.
		 */

		string_tab[local].next = tempnext;

		return tempnext;
	}
}


/*  The unhash() function is used to search the hash table for a given key.
    Like hash(), it performs a hash and linear probe search.  It returns
    either the number of the entry (if found) or NOT_FND (if not found).
*/

static unsigned unhash(pred, foll)	/* search string table for a key */
unsigned int	pred;			/* code of preceeding string */
unsigned char	foll;			/* character following string */
{
	unsigned int	local, offset;	/* scratch storage */
	struct entry *ep;		/* this speeds up access */

	local = (*h)(pred, foll);	/* initial hash */

	while (1) {
		ep = &string_tab[local];	/* speed up table access */

		if ((ep->predecessor == pred) && (ep->follower == foll))
			return local;		/* we have a match */

		if (!ep->next)			/* if no more duplicates */
			return NOT_FND; 	/* then key is not listed */

		local = ep->next;		/* move on to next duplicate */
	}
}


/*  The init_tab() routine is used to initialize our hash table.
    You realize, of course, that "initialize" is a complete misnomer.
 */

static init_tab()			/* set ground state in hash table */
{
	unsigned int	i;		/* table index */

	setmem((char *)string_tab, sizeof(string_tab), 0);

	for (i = 0; i < 256; i++)	/* list all single byte strings */
		upd_tab(NO_PRED, i);

	inbuf = EMPTY;			/* nothing is in our buffer */
}


/*  The upd_tab routine is used to add a new entry to the string table.
    As previously stated, no checks are made to ensure that the table
    has any room.  This must be done elsewhere.
*/

upd_tab(pred, foll)			/* add an entry to the table */
unsigned int	pred;			/* code for preceeding string */
unsigned int	foll;			/* character which follows string */
{
	struct entry *ep;		/* pointer to current entry */

	/* calculate offset just once */

	ep = &string_tab[hash(pred,foll)];

	ep->used = TRUE;		/* this spot is now in use */
	ep->next = 0;			/* no duplicates after this yet */
	ep->predecessor = pred; 	/* note code of preceeding string */
	ep->follower = foll;		/* note char after string */
}


/*  This algorithm encoded a file into twelve bit strings (three nybbles).
    The gocode() routine is used to read these strings a byte (or two)
    at a time.
*/

static gocode(fd)			/* read in a twelve bit code */
FILE *fd;				/* file to get code from */
{
	unsigned int	localbuf, returnval;

	if (inbuf == EMPTY) {		/* if on a code boundary */
		if ((localbuf = getc_unp(fd)) == EOF)	/* get start of next code */
			return EOF;	/* pass back end of file status */
		localbuf &= 0xFF;	/* mask down to true byte value */
		if ((inbuf = getc_unp(fd)) == EOF)	/* get end of code, start of next */
			return EOF;	/* this should never happen */
		inbuf &= 0xFF;		/* mask down to true byte value */

		returnval = ((localbuf << 4) & 0xFF0) + ((inbuf >> 4) & 0x00F);
		inbuf &= 0x000F;	/* leave partial code pending */
	} else {				/* buffer contains first nybble */
		if ((localbuf = getc_unp(fd)) == EOF)
			return EOF;
		localbuf &= 0xFF;

		returnval = localbuf + ((inbuf << 8) & 0xF00);
		inbuf = EMPTY;		/* note no hanging nybbles */
	}
	return returnval;		/* pass back assembled code */
}


static push(c)				/* push char onto stack */
int	c;				/* character to push */
{
	stack[sp] = ((char) c); 	/* coerce integer into a char */

	if (++sp >= TABSIZE)
#if LATTICE				/* + Suematsu */
		abort("Stack overflow\n");
#else
		{
			printf("Stack overflow\n");
			exit(1);
		}
#endif					/* - Suematsu */
}


static int	pop()				/* pop character from stack */
{
	if (sp > 0)
		return ((int) stack[--sp]);	/* leave ptr at next empty slot */

	else
		return EMPTY;
}


/***** LEMPEL-ZEV DECOMPRESSION *****/

static int	code_count;		/* needed to detect table full */
static unsigned code;			/* where we are so far */
static int	firstc; 		/* true only on first character */

init_ucr(new)				/* get set for uncrunching */
int	new;				/* true to use new hash function */
{
	if (new)			/* set proper hash function */
		h = newh;
	else
		h = oldh;

	sp = 0; 			/* clear out the stack */
	init_tab();			/* set up atomic code definitions */
	code_count = TABSIZE - 256;	/* note space left in table */
	firstc = 1;			/* true only on first code */
}


int	getc_ucr(f)			/* get next uncrunched byte */
FILE *f;				/* file containing crunched data */
{
	unsigned int	c;		/* a character of input */
	int		code, newcode;
	static int	oldcode, finchar;
	struct entry	*ep;		/* allows faster table handling */

	if (firstc) {			/* first code is always known */
		firstc = FALSE; 	/* but next will not be first */
		oldcode = gocode(f);
		return finchar = string_tab[oldcode].follower;
	}

	if (!sp) {			/* if stack is empty */
		if ((code = newcode = gocode(f)) == EOF)
			return EOF;

		ep = &string_tab[code]; /* initialize pointer */

		if (!ep->used) {		/* if code isn't known */
			code = oldcode;
			ep = &string_tab[code];	/* re-initialize pointer */
			push(finchar);
		}

		while (ep->predecessor != NO_PRED) {
			push(ep->follower);	/* decode string backwards */
			code = ep->predecessor;
			ep = &string_tab[code];
		}

		push(finchar = ep->follower);	/* save first character also */

		/* The above loop will terminate, one way or another,
		   with string_tab[code].follower equal to the first
		   character in the string.
		 */

		if (code_count) { 		/* if room left in string table */
			upd_tab(oldcode, finchar);
			--code_count;
		}

		oldcode = newcode;
	}

	return pop();				/* return saved character */
}

